Exploratory Data Analysis - nmdat1.csv

.libPaths("~/renv/renv/library/R-4.5/aarch64-apple-darwin20/")
library(pmplots)
library(pmtables)
library(yspec)
library(dplyr)
library(here)
library(ggplot2)
library(data.table)
library(lastdose)
library(yspec)
library(patchwork)
full <- fread(here("model1/data/nmdat1.csv"), na.strings = ".")

The data spec will help us format tables and figures

spec <- ys_load(here("wk02/nmdat1.yaml"))

table <- ys_get_short_unit(spec)
full <- ys_factors(full, spec, GENO, SEX, DOSE)

Calculate time after last dose

full <- lastdose(full)

head(full)
        C   NUM    ID  TIME   AMT   CMT  EVID      DV   BLQ  LLOQ   AGE    SEX
   <lgcl> <int> <int> <num> <int> <int> <int>   <num> <int> <int> <int> <fctr>
1:     NA     1     1  0.00    50     1     1    0.00     0    50    13   Male
2:     NA     2     1  0.08     0     2     0  228.98     0    50    13   Male
3:     NA     3     1  0.27     0     2     0  565.98     0    50    13   Male
4:     NA     4     1  0.49     0     2     0  791.70     0    50    13   Male
5:     NA     5     1  0.91     0     2     0  964.83     0    50    13   Male
6:     NA     6     1  1.58     0     2     0 1040.88     0    50    13   Male
     ALB   SCR    WT    HT               GENO   BSA   EGFR   DOSE SEX_v GENO_v
   <num> <num> <num> <num>             <fctr> <num>  <num> <fctr> <int>  <int>
1:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
2:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
3:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
4:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
5:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
6:   4.3   0.8  55.9 168.2 Normal metabolizer  1.63 143.18  50 mg     1      0
   DOSE_v   TAD  LDOS   OCC
    <int> <num> <num> <num>
1:     50  0.00    50     1
2:     50  0.08    50     1
3:     50  0.27    50     1
4:     50  0.49    50     1
5:     50  0.91    50     1
6:     50  1.58    50     1
count(full, OCC)
     OCC     n
   <num> <int>
1:     1  3435

1 Get Started

1.1 Basic inventory of the data

  • Include all observation records (EVID==0) and any BLQ records.
  • I’m stratifying (or splitting) the data by all the categorical covariates I see in the data.
  • We also get the inventory for all the data.
data <- filter(full, EVID==0)

pt_inventory_long(
  data, 
  cols = c("DOSE", "GENO", "SEX"), 
  table = table
) %>% st_as_image()

1.2 Make a simple plot

ggplot(data, aes(TIME, DV)) + geom_point()

1.3 Customize the plot

  • I like to update the x-axis tick labels to be more intuitive
ggplot(data, aes(TIME, DV)) + geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4))

  • And we frequently would rater see this on log scale
#| fig-width: 7
#| fig-height: 5
ggplot(data, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10()

  • I like to have a tick label above the highest observation
ggplot(data, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(NA, 10000))

  • Let’s drop the BLQ observations
obs <- filter(data, BLQ==0)

ggplot(obs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000))

  • Let’s give this proper labels
ggplot(obs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)")

1.4 Do you want to know more about absorption?

Make a view specific for that question

abs <- filter(obs, TIME < 8)
ggplot(abs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,8,2)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)")

2 Dig deeper into the covariates

  • First, get a data set with baseline covariates
id <- 
  obs %>% 
  slice(1, .by = ID) %>% 
  mutate(across(c(SEX, DOSE, GENO), ~ factor(.x)))

2.1 Covariates - tabular summary

pt_demographics(
  id, 
  cols_cat = c("SEX", "GENO"), 
  cols_cont = c("ALB", "WT", "AGE", "EGFR"),
  span = c("Demothizine dose" = "DOSE"), 
  table = table
) %>% st_as_image()

pt_demographics(
  id, 
  cols_cat = c("SEX", "DOSE"), 
  cols_cont = c("ALB", "WT", "AGE", "EGFR"),
  span = c(Genotype = "GENO"), 
  table = table
) %>% st_as_image()

2.2 Covariates - graphical summary

pl <- list_plot_x(
  id,
  x = c("SEX", "DOSE", "GENO"), 
  y = c("ALB", "WT", "BSA", "EGFR"), 
  .fun = cont_cat
)
pl <- lapply(pl, pm_grid, ncol = 2)
pl
$SEX


$DOSE


$GENO

  • What’s the difference with these plots?
pl <- list_plot_y(
  id,
  x = c("SEX", "DOSE", "GENO"), 
  y = c("ALB", "WT", "BSA", "EGFR"), 
  .fun = cont_cat
)
pl <- lapply(pl, pm_grid, ncol = 2)
pl
$ALB


$WT


$BSA


$EGFR

2.3 Covariates - correlations

y <- 
  spec %>% 
  ys_namespace("plot") %>% 
  axis_col_labs(vars(ALB, WT, AGE, EGFR))

pairs_plot(id, y = y)

3 How does PK differ across different covariates?

3.1 By dose

ggplot(obs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)") + 
  facet_grid(~DOSE)

  • I wouldn’t rely on color to separate the data
ggplot(obs, aes(TIME, DV, col = factor(DOSE))) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)")

3.2 By genotype

ggplot(obs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)") + 
  facet_grid(DOSE~GENO)

ggplot(obs, aes(TIME, DV)) + 
  geom_point() + 
  scale_x_continuous(breaks = seq(0,52,4)) + 
  scale_y_log10(breaks = logbr3(), limits = c(30, 10000)) + 
  geom_hline(yintercept = c(300, 1000, 3000), lty = 2) + 
  labs(x = "Time (h)", y = "Demothizine concentration (ng/mL)") + 
  facet_grid(DOSE~GENO)

3.3 Try boxplots at a nominal time point for categorical covariates

cmin <- filter(data, TIME > 20 & TIME < 25)

ggplot(cmin) + 
  geom_boxplot(aes(x = factor(GENO), y = DV)) + 
  scale_y_log10() + 
  facet_wrap(~DOSE) + 
  labs(y = "Demothizine concentration (ng/mL)", x = "Genotype")

3.3.1 Do we see the same by SEX?

ggplot(cmin) + 
  geom_boxplot(aes(x = factor(SEX), y = DV)) + 
  scale_y_log10() + 
  facet_wrap(~DOSE) + 
  labs(y = "Demothizine concentration (ng/mL)", x = "Sex")

3.4 What about continuous?

ggplot(cmin, aes(x = EGFR, y = DV)) + 
  geom_point() + 
  geom_smooth()

xx <- axis_col_labs(spec, vars(EGFR, ALB, WT, AGE))

p <- cont_cont(
  cmin, 
  x = xx, 
  y = "DV//Demothizine Cmin (ng/mL)", 
  ys = list(limits = c(0, NA))
) 

p <- lapply(p, layer_s)

pm_grid(p) + plot_layout(axes = "collect")